Analysis date: 2020-11-18
library(tidyverse)
library(limma)
library(ggbeeswarm)
library(MultiAssayExperiment)
library(pheatmap)
library(ggpubr)
library(readxl)
library(biomartr)
library(biomaRt)
library(Rtsne)
library(pheatmap)
select <- dplyr::select
set.seed(2020)
source("Data/Figure_layouts.R")
DIA <- readRDS("Robjects/DIA2_alldata.RData")
DIA_complete_formated <- readRDS("Robjects/DIA_complete_formated_v3.RData")
load("Data/CLL_Proteomics_Setup.RData")
load("Data/CLL_Proteomics_LimmaProteomics.RData")
load("Robjects/pred_DIA_PG5.RData")
message("Did we measure TP53?")
## Did we measure TP53?
any(DIA$PG.ProteinGroups=="TP53")
## [1] FALSE
message("Is TP53 present in the unfiltered dataset?")
## Is TP53 present in the unfiltered dataset?
any(DIA_complete_formated$PG.ProteinGroups=="TP53")
## [1] TRUE
DIA_complete_formated %>% filter(PG.ProteinGroups=="TP53", !is.na( log.norm.MS2Quantity)) %>%
select(Sample, Pat_ID, log.norm.MS2Quantity, cohort, TP53)
DIA %>%
filter(!is.na(TP53),PG.ProteinGroups %in% BCR_genes, cohort != "Germany_1" ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, TP53) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(TP53 = as.factor(TP53)) %>%
ggplot(aes(TP53, mean_BCR, group= TP53, fill=TP53 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(TP53),PG.ProteinGroups %in% BCR_genes ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, TP53) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(TP53 = as.factor(TP53)) %>%
ggplot(aes(TP53, mean_BCR, group= TP53, fill=TP53 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(TP53),PG.ProteinGroups %in% BCR_genes, cohort != "Germany_1",
!Sample %in% (pred_DIA_PG5 %>% filter(PG5_predicted_ktsp == TRUE) %>% .$Sample ) ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, TP53) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(TP53 = as.factor(TP53)) %>%
ggplot(aes(TP53, mean_BCR, group= TP53, fill=TP53 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics, no PG5") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(TP53),PG.ProteinGroups %in% BCR_genes,
!Sample %in% (pred_DIA_PG5 %>% filter(PG5_predicted_ktsp == TRUE) %>% .$Sample ) ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, TP53) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(TP53 = as.factor(TP53)) %>%
ggplot(aes(TP53, mean_BCR, group= TP53, fill=TP53 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
up_in_TP53_100 <-
limma_results %>% filter(mut == "SNPs_TP53",
logFC > 0, fdr < 0.1) %>%
arrange(desc(logFC)) %>%
dplyr::slice(1:100) %>% .$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(TP53),PG.ProteinGroups %in% up_in_TP53_100 ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the top 100 upregulated proteins found in DIA data"))
## 42 of the top 100 upregulated proteins found in DIA data
DIA %>%
filter(cohort != "Germany_1", !is.na(TP53),PG.ProteinGroups %in% up_in_TP53_100 ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, TP53) %>%
summarise(mean_up = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(TP53 = as.factor(TP53)) %>%
ggplot(aes(TP53, mean_up, group= TP53, fill=TP53 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(TP53),PG.ProteinGroups %in% up_in_TP53_100 ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, TP53) %>%
summarise(mean_up = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(TP53 = as.factor(TP53)) %>%
ggplot(aes(TP53, mean_up, group= TP53, fill=TP53 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
down_in_TP53_100 <-
limma_results %>% filter(mut == "SNPs_TP53",
logFC < 0, fdr < 0.1) %>%
arrange(logFC) %>%
dplyr::slice(1:100) %>% .$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(TP53),PG.ProteinGroups %in% down_in_TP53_100 ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the top 100 downregulated proteins found in DIA data"))
## 65 of the top 100 downregulated proteins found in DIA data
DIA %>%
filter(cohort != "Germany_1", !is.na(TP53),PG.ProteinGroups %in% down_in_TP53_100 ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, TP53) %>%
summarise(mean_down = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(TP53 = as.factor(TP53)) %>%
ggplot(aes(TP53, mean_down, group= TP53, fill=TP53 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(TP53),PG.ProteinGroups %in% down_in_TP53_100 ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, TP53) %>%
summarise(mean_down = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(TP53 = as.factor(TP53)) %>%
ggplot(aes(TP53, mean_down, group= TP53, fill=TP53 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
down_in_TP53_X <-
limma_results %>% filter(mut == "SNPs_TP53",
logFC < 0, fdr < 0.05) %>%
arrange(logFC) %>%
dplyr::slice(1:100) %>% .$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(TP53),PG.ProteinGroups %in% down_in_TP53_X ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the top downregulated proteins found in DIA data"))
## 39 of the top downregulated proteins found in DIA data
up_in_TP53_X <-
limma_results %>% filter(mut == "SNPs_TP53",
logFC > 0, fdr < 0.05) %>%
arrange(desc(logFC)) %>%
dplyr::slice(1:100) %>% .$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(TP53),PG.ProteinGroups %in% up_in_TP53_X ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the top downregulated proteins found in DIA data"))
## 28 of the top downregulated proteins found in DIA data
TP53_diff_mx <- DIA %>%
filter(cohort != "Germany_1", !is.na(TP53),PG.ProteinGroups %in% c(up_in_TP53_X, down_in_TP53_X ) ) %>%
select( Sample, log.norm.MS2Quantity, PG.ProteinGroups ) %>%
pivot_wider(names_from = Sample, values_from= log.norm.MS2Quantity ) %>%
column_to_rownames( "PG.ProteinGroups" ) %>%
as.matrix()
TP53_diff_ann <- DIA %>%
filter(cohort != "Germany_1", !is.na(TP53),PG.ProteinGroups %in% c(up_in_TP53_X, down_in_TP53_X ) ) %>%
select( Sample, TP53, cohort, IGHV_mutated, trisomy12 ) %>%
unique() %>%
column_to_rownames("Sample")
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
annotation_col = TP53_diff_ann,
scale = "row"
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
TP53_diff_ann <- TP53_diff_ann %>% arrange(TP53)
TP53_diff_mx <- TP53_diff_mx[, rownames(TP53_diff_ann)]
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
scale = "row",
annotation_col = TP53_diff_ann,
cluster_cols = FALSE
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
TP53_diff_mx <- DIA %>%
filter(cohort %in% c("Germany_2", "Germany_3") , !is.na(TP53),PG.ProteinGroups %in% c(up_in_TP53_X, down_in_TP53_X ) ) %>%
select( Sample, log.norm.MS2Quantity, PG.ProteinGroups ) %>%
pivot_wider(names_from = Sample, values_from= log.norm.MS2Quantity ) %>%
column_to_rownames( "PG.ProteinGroups" ) %>%
as.matrix()
TP53_diff_ann <- DIA %>%
filter(cohort %in% c("Germany_2", "Germany_3") , !is.na(TP53),PG.ProteinGroups %in% c(up_in_TP53_X, down_in_TP53_X ) ) %>%
select( Sample, TP53, cohort, IGHV_mutated, trisomy12, del17p13 ) %>%
unique() %>%
column_to_rownames("Sample")
message("unscaled")
## unscaled
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
#scale = "row",
annotation_col = TP53_diff_ann
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
message("row scaled")
## row scaled
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
scale = "row",
annotation_col = TP53_diff_ann
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
TP53_diff_ann <- TP53_diff_ann %>% arrange(TP53)
TP53_diff_mx <- TP53_diff_mx[, rownames(TP53_diff_ann)]
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
scale = "row",
annotation_col = TP53_diff_ann,
cluster_cols = FALSE
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
TP53_diff_mx <- DIA %>%
filter(cohort %in% c("Sweden_1") , !is.na(TP53),PG.ProteinGroups %in% c(up_in_TP53_X, down_in_TP53_X ) ) %>%
select( Sample, log.norm.MS2Quantity, PG.ProteinGroups ) %>%
pivot_wider(names_from = Sample, values_from= log.norm.MS2Quantity ) %>%
column_to_rownames( "PG.ProteinGroups" ) %>%
as.matrix()
TP53_diff_ann <- DIA %>%
filter(cohort %in% c("Sweden_1") , !is.na(TP53),PG.ProteinGroups %in% c(up_in_TP53_X, down_in_TP53_X ) ) %>%
select( Sample, TP53, cohort, IGHV_mutated, trisomy12 ) %>%
unique() %>%
column_to_rownames("Sample")
message("unscaled")
## unscaled
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
#scale = "row",
annotation_col = TP53_diff_ann
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
message("row scaled")
## row scaled
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
scale = "row",
annotation_col = TP53_diff_ann
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
TP53_diff_ann <- TP53_diff_ann %>% arrange(TP53)
TP53_diff_mx <- TP53_diff_mx[, rownames(TP53_diff_ann)]
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
scale = "row",
annotation_col = TP53_diff_ann,
cluster_cols = FALSE
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
TP53_diff_mx <- DIA %>%
filter(cohort %in% c("High_risk") , !is.na(TP53),PG.ProteinGroups %in% c(up_in_TP53_X, down_in_TP53_X ) ) %>%
select( Sample, log.norm.MS2Quantity, PG.ProteinGroups ) %>%
pivot_wider(names_from = Sample, values_from= log.norm.MS2Quantity ) %>%
column_to_rownames( "PG.ProteinGroups" ) %>%
as.matrix()
TP53_diff_ann <- DIA %>%
filter(cohort %in% c("High_risk") , !is.na(TP53),PG.ProteinGroups %in% c(up_in_TP53_X, down_in_TP53_X ) ) %>%
select( Sample, TP53, cohort, IGHV_mutated, trisomy12, del17p13 ) %>%
unique() %>%
column_to_rownames("Sample")
message("unscaled")
## unscaled
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
#scale = "row",
annotation_col = TP53_diff_ann
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
message("row scaled")
## row scaled
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
scale = "row",
annotation_col = TP53_diff_ann
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
TP53_diff_ann <- TP53_diff_ann %>% arrange(TP53)
TP53_diff_mx <- TP53_diff_mx[, rownames(TP53_diff_ann)]
TP53_diff_mx %>%
pheatmap(
color = RColorBrewer::brewer.pal(100, "RdBu"),
show_colnames = F,
scale = "row",
annotation_col = TP53_diff_ann,
cluster_cols = FALSE
)
## Warning in RColorBrewer::brewer.pal(100, "RdBu"): n too large, allowed maximum for palette RdBu is 11
## Returning the palette you asked for with that many colors
DIA %>%
filter(!is.na(trisomy12),PG.ProteinGroups %in% BCR_genes ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_BCR, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(trisomy12),PG.ProteinGroups %in% BCR_genes ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_BCR, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
plot_chromosome_theme <- list(
coord_cartesian(ylim=c(-0.8,0.8)),
facet_wrap(~paste("chromosome",chromosome_name), scales = "free_x"),
ylab("log2 norm. protein abundance"),
xlab("Protein location on chromosome"),
scale_color_manual(values=c("#0571b0", "#ca0020", "grey"))
)
Chr12_P_plot_DIA_Germany_1 <-
DIA %>%
separate( col = PG.Locus, into = c(NA, "chromosome_name", "start_position") , sep = ":", remove = FALSE, extra = "drop" ) %>%
filter( !is.na(log.norm.MS2Quantity),
chromosome_name %in% c("12"), cohort == "Germany_1") %>%
mutate(trisomy12 = as.factor(trisomy12),
start_position = as.numeric(start_position)) %>%
ggplot(aes(start_position, log.norm.MS2Quantity, group=Sample)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("Germany_1 trisomy12") +
geom_rect(xmin = 0, ymin=-0.78, ymax=0.78, xmax=133275309, color="gray40", size=1.5, fill=NA)
Chr12_P_plot_DIA_Germany_1 + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 80 row(s) containing missing values (geom_path).
Chr12_P_plot_DIA_Germany_2 <-
DIA %>%
separate( col = PG.Locus, into = c(NA, "chromosome_name", "start_position") , sep = ":", remove = FALSE, extra = "drop" ) %>%
filter( !is.na(log.norm.MS2Quantity),
chromosome_name %in% c("12"), cohort == "Germany_2") %>%
mutate(trisomy12 = as.factor(trisomy12),
start_position = as.numeric(start_position)) %>%
ggplot(aes(start_position, log.norm.MS2Quantity, group=Sample)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("Germany_2 trisomy12") +
geom_rect(xmin = 0, ymin=-0.78, ymax=0.78, xmax=133275309, color="gray40", size=1.5, fill=NA)
Chr12_P_plot_DIA_Germany_2 + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 240 row(s) containing missing values (geom_path).
Chr12_P_plot_DIA_Germany_3 <-
DIA %>%
separate( col = PG.Locus, into = c(NA, "chromosome_name", "start_position") , sep = ":", remove = FALSE, extra = "drop" ) %>%
filter( !is.na(log.norm.MS2Quantity),
chromosome_name %in% c("12"), cohort == "Germany_3") %>%
mutate(trisomy12 = as.factor(trisomy12),
start_position = as.numeric(start_position)) %>%
ggplot(aes(start_position, log.norm.MS2Quantity, group=Sample)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("Germany_3 trisomy12") +
geom_rect(xmin = 0, ymin=-0.78, ymax=0.78, xmax=133275309, color="gray40", size=1.5, fill=NA)
Chr12_P_plot_DIA_Germany_3 + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
Chr12_P_plot_DIA_Germany_2_3 <-
DIA %>%
separate( col = PG.Locus, into = c(NA, "chromosome_name", "start_position") , sep = ":", remove = FALSE, extra = "drop" ) %>%
filter( !is.na(log.norm.MS2Quantity),
chromosome_name %in% c("12"), cohort %in% c( "Germany_3", "Germany_2" ) ) %>%
mutate(trisomy12 = as.factor(trisomy12),
start_position = as.numeric(start_position)) %>%
ggplot(aes(start_position, log.norm.MS2Quantity, group=Sample)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("Germany_2 and Germany_3 trisomy12") +
geom_rect(xmin = 0, ymin=-0.78, ymax=0.78, xmax=133275309, color="gray40", size=1.5, fill=NA)
Chr12_P_plot_DIA_Germany_2_3 + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 240 row(s) containing missing values (geom_path).
Chr12_P_plot_DIA_Sweden_1 <-
DIA %>%
separate( col = PG.Locus, into = c(NA, "chromosome_name", "start_position") , sep = ":", remove = FALSE, extra = "drop" ) %>%
filter( !is.na(log.norm.MS2Quantity),
chromosome_name %in% c("12"), cohort == "Sweden_1") %>%
mutate(trisomy12 = as.factor(trisomy12),
start_position = as.numeric(start_position)) %>%
ggplot(aes(start_position, log.norm.MS2Quantity, group=Sample)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("Sweden_1 trisomy12") +
geom_rect(xmin = 0, ymin=-0.78, ymax=0.78, xmax=133275309, color="gray40", size=1.5, fill=NA)
Chr12_P_plot_DIA_Sweden_1 + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 320 row(s) containing missing values (geom_path).
Chr12_P_plot_DIA_High_risk <-
DIA %>%
separate( col = PG.Locus, into = c(NA, "chromosome_name", "start_position") , sep = ":", remove = FALSE, extra = "drop" ) %>%
filter( !is.na(log.norm.MS2Quantity),
chromosome_name %in% c("12"), cohort == "High_risk") %>%
mutate(trisomy12 = as.factor(trisomy12),
start_position = as.numeric(start_position)) %>%
ggplot(aes(start_position, log.norm.MS2Quantity, group=Sample)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("High_risk trisomy12") +
geom_rect(xmin = 0, ymin=-0.78, ymax=0.78, xmax=133275309, color="gray40", size=1.5, fill=NA)
Chr12_P_plot_DIA_High_risk + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 480 row(s) containing missing values (geom_path).
Chr12_P_plot_DIA_all <-
DIA %>%
separate( col = PG.Locus, into = c(NA, "chromosome_name", "start_position") , sep = ":", remove = FALSE, extra = "drop" ) %>%
filter( !is.na(log.norm.MS2Quantity),
chromosome_name %in% c("12")) %>%
mutate(trisomy12 = as.factor(trisomy12),
start_position = as.numeric(start_position)) %>%
ggplot(aes(start_position, log.norm.MS2Quantity, group=Sample)) +
geom_point(size=0.5, alpha=0.2, color="darkgrey") +
stat_smooth(geom='line', alpha=0.5, se=FALSE, aes(color=trisomy12), span=0.5, method = "loess") +
plot_chromosome_theme +
pp_sra +
ggtitle("Trisomy12 all cohorts") +
geom_rect(xmin = 0, ymin=-0.78, ymax=0.78, xmax=133275309, color="gray40", size=1.5, fill=NA)
Chr12_P_plot_DIA_all + theme(aspect.ratio=0.4, legend.position = 'none')
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 1120 row(s) containing missing values (geom_path).
up_in_tris12_hit <-
limma_results %>% filter(mut == "chrom_abber_trisomy12",
logFC > 0, hit_annotation == "hit" ) %>%
.$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% up_in_tris12_hit ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the upregulated hits found in DIA data"))
## 26 of the upregulated hits found in DIA data
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% up_in_tris12_hit ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_up = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_up, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(trisomy12),PG.ProteinGroups %in% up_in_tris12_hit ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_up = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_up, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
down_in_tris12_hit <-
limma_results %>% filter(mut == "chrom_abber_trisomy12",
logFC < 0, hit_annotation == "hit" ) %>%
.$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% down_in_tris12_hit ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the downregulated hits found in DIA data"))
## 4 of the downregulated hits found in DIA data
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% down_in_tris12_hit ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_down = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_down, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(trisomy12),PG.ProteinGroups %in% down_in_tris12_hit ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_down = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_down, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
up_in_tris12_cand <-
limma_results %>% filter(mut == "chrom_abber_trisomy12",
logFC > 0, hit_annotation %in% c( "hit", "candidate") ) %>%
.$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% up_in_tris12_cand ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the upregulated hits found in DIA data"))
## 109 of the upregulated hits found in DIA data
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% up_in_tris12_cand ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_up = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_up, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(trisomy12),PG.ProteinGroups %in% up_in_tris12_cand ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_up = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_up, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
down_in_tris12_cand <-
limma_results %>% filter(mut == "chrom_abber_trisomy12",
logFC < 0, hit_annotation %in% c( "hit", "candidate") ) %>%
.$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% down_in_tris12_cand ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the downregulated hits found in DIA data"))
## 30 of the downregulated hits found in DIA data
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% down_in_tris12_cand ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_down = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_down, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(trisomy12),PG.ProteinGroups %in% down_in_tris12_cand ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_down = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_down, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
up_in_tris12_100 <-
limma_results %>% filter(mut == "chrom_abber_trisomy12",
logFC > 0, fdr < 0.1) %>%
arrange(desc(logFC)) %>%
dplyr::slice(1:100) %>% .$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% up_in_tris12_100 ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the top 100 upregulated proteins found in DIA data"))
## 37 of the top 100 upregulated proteins found in DIA data
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% up_in_tris12_100 ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_up = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_up, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(trisomy12),PG.ProteinGroups %in% up_in_tris12_100 ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_up = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_up, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
down_in_tris12_100 <-
limma_results %>% filter(mut == "chrom_abber_trisomy12",
logFC < 0, fdr < 0.1) %>%
arrange(logFC) %>%
dplyr::slice(1:100) %>% .$gene
message(paste(
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% down_in_tris12_100 ) %>% .$PG.ProteinGroups %>% unique %>% length,
"of the top 100 downregulated proteins found in DIA data"))
## 35 of the top 100 downregulated proteins found in DIA data
DIA %>%
filter(cohort != "Germany_1", !is.na(trisomy12),PG.ProteinGroups %in% down_in_tris12_100 ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_down = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_down, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("All DIA proteomics") +
pp_sra
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
DIA %>%
filter(!is.na(trisomy12),PG.ProteinGroups %in% down_in_tris12_100 ) %>%
group_by(cohort, Sample, `Sample ID`, Pat_ID, trisomy12) %>%
summarise(mean_down = mean(log.norm.MS2Quantity, na.rm=TRUE )) %>%
ungroup() %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_down, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
pp_sra +
facet_wrap(~cohort)
## `summarise()` regrouping output by 'cohort', 'Sample', 'Sample ID', 'Pat_ID' (override with `.groups` argument)
############ Germany_1
DIA %>%
filter(!is.na(IGHV_mutated),PG.ProteinGroups =="ZAP70", cohort == "Germany_1" ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
ggplot(aes(IGHV_mutated, log.norm.MS2Quantity, group= IGHV_mutated, fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("DIA proteomics Germany_1") +
pp_sra
message("Number of NAs DIA Germany_1:")
## Number of NAs DIA Germany_1:
DIA %>%
filter(!is.na(IGHV_mutated), cohort == "Germany_1",PG.ProteinGroups =="ZAP70" ) %>%
select(IGHV_mutated, Sample, log.norm.MS2Quantity) %>% unique %>% group_by(IGHV_mutated) %>% summarise(NAs=sum(is.na(log.norm.MS2Quantity)) )
## `summarise()` ungrouping output (override with `.groups` argument)
############ Germany_2
DIA %>%
filter(!is.na(IGHV_mutated),PG.ProteinGroups =="ZAP70", cohort == "Germany_2" ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
ggplot(aes(IGHV_mutated, log.norm.MS2Quantity, group= IGHV_mutated, fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("DIA proteomics Germany_2") +
pp_sra
message("Number of NAs DIA Germany_2:")
## Number of NAs DIA Germany_2:
DIA %>%
filter(!is.na(IGHV_mutated), cohort == "Germany_2",PG.ProteinGroups =="ZAP70" ) %>% select(IGHV_mutated, Sample, log.norm.MS2Quantity) %>% unique %>% group_by(IGHV_mutated) %>% summarise(NAs=sum(is.na(log.norm.MS2Quantity)) )
## `summarise()` ungrouping output (override with `.groups` argument)
############ Germany_3
DIA %>%
filter(!is.na(IGHV_mutated),PG.ProteinGroups =="ZAP70", cohort == "Germany_3" ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
ggplot(aes(IGHV_mutated, log.norm.MS2Quantity, group= IGHV_mutated, fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("DIA proteomics Germany_3") +
pp_sra
message("Number of NAs DIA Germany_3:")
## Number of NAs DIA Germany_3:
DIA %>%
filter(!is.na(IGHV_mutated), cohort == "Germany_3",PG.ProteinGroups =="ZAP70" ) %>% select(IGHV_mutated, Sample, log.norm.MS2Quantity) %>% unique %>% group_by(IGHV_mutated) %>% summarise(NAs=sum(is.na(log.norm.MS2Quantity)) )
## `summarise()` ungrouping output (override with `.groups` argument)
############ Sweden
DIA %>%
filter(!is.na(IGHV_mutated),PG.ProteinGroups =="ZAP70", cohort == "Sweden_1" ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
ggplot(aes(IGHV_mutated, log.norm.MS2Quantity, group= IGHV_mutated, fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("DIA proteomics Sweden_1") +
pp_sra
message("Number of NAs DIA Sweden_1:")
## Number of NAs DIA Sweden_1:
DIA %>%
filter(!is.na(IGHV_mutated), cohort == "Sweden_1",PG.ProteinGroups =="ZAP70" ) %>% select(IGHV_mutated, Sample, log.norm.MS2Quantity) %>% unique %>% group_by(IGHV_mutated) %>% summarise(NAs=sum(is.na(log.norm.MS2Quantity)) )
## `summarise()` ungrouping output (override with `.groups` argument)
############ High risk
DIA %>%
filter(!is.na(IGHV_mutated),PG.ProteinGroups =="ZAP70", cohort == "High_risk" ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
ggplot(aes(IGHV_mutated, log.norm.MS2Quantity, group= IGHV_mutated, fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("DIA proteomics High_risk") +
pp_sra
message("Number of NAs DIA High_risk:")
## Number of NAs DIA High_risk:
DIA %>%
filter(!is.na(IGHV_mutated), cohort == "High_risk",PG.ProteinGroups =="ZAP70" ) %>% select(IGHV_mutated, Sample, log.norm.MS2Quantity) %>% unique %>% group_by(IGHV_mutated) %>% summarise(NAs=sum(is.na(log.norm.MS2Quantity)) )
## `summarise()` ungrouping output (override with `.groups` argument)
############ all
DIA %>%
filter(!is.na(IGHV_mutated),PG.ProteinGroups =="ZAP70" ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
ggplot(aes(IGHV_mutated, log.norm.MS2Quantity, group= IGHV_mutated, fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("DIA proteomics all") +
pp_sra
message("Number of NAs DIA all")
## Number of NAs DIA all
DIA %>%
filter(!is.na(IGHV_mutated),PG.ProteinGroups =="ZAP70" ) %>% select(IGHV_mutated, Sample, log.norm.MS2Quantity) %>% unique %>% group_by(IGHV_mutated) %>% summarise(NAs=sum(is.na(log.norm.MS2Quantity)) )
## `summarise()` ungrouping output (override with `.groups` argument)
DIA %>%
filter(!is.na(IGHV_mutated),PG.ProteinGroups %in% BCR_genes) %>%
group_by(Sample, IGHV_mutated, cohort) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm= TRUE) ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
ggplot(aes(IGHV_mutated, mean_BCR, group= IGHV_mutated, fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("BCR protein abundances ~ IGHV") +
pp_sra
## `summarise()` regrouping output by 'Sample', 'IGHV_mutated' (override with `.groups` argument)
DIA %>%
filter(!is.na(IGHV_mutated),PG.ProteinGroups %in% BCR_genes) %>%
group_by(Sample, IGHV_mutated, cohort) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm= TRUE) ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
ggplot(aes(IGHV_mutated, mean_BCR, group= IGHV_mutated, fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "wilcox") +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("BCR protein abundances ~ IGHV") +
facet_wrap(~cohort) +
pp_sra
## `summarise()` regrouping output by 'Sample', 'IGHV_mutated' (override with `.groups` argument)
DIA %>%
filter(!is.na(IGHV_mutated),!is.na(trisomy12), PG.ProteinGroups %in% BCR_genes, cohort != "Germany_1") %>%
group_by(Sample, IGHV_mutated,trisomy12, cohort) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm= TRUE) ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(interaction(IGHV_mutated, trisomy12), mean_BCR, group= interaction(IGHV_mutated, trisomy12), fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means() +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("BCR protein abundances ~ IGHV + trisomy12") +
pp_sra
## `summarise()` regrouping output by 'Sample', 'IGHV_mutated', 'trisomy12' (override with `.groups` argument)
DIA %>%
filter(!is.na(IGHV_mutated),!is.na(trisomy12), PG.ProteinGroups %in% BCR_genes) %>%
group_by(Sample, IGHV_mutated, trisomy12, cohort) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm= TRUE) ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(interaction(IGHV_mutated, trisomy12), mean_BCR, group= interaction(IGHV_mutated, trisomy12), fill=IGHV_mutated )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means() +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("BCR protein abundances ~ IGHV + trisomy12") +
facet_wrap(~cohort) +
pp_sra
## `summarise()` regrouping output by 'Sample', 'IGHV_mutated', 'trisomy12' (override with `.groups` argument)
DIA %>%
filter(!is.na(IGHV_mutated),!is.na(trisomy12), PG.ProteinGroups %in% BCR_genes, cohort != "Germany_1",
(is.na(TP53) | TP53== 0), (is.na(del17p13) | del17p13== 0),
!Sample %in% (pred_DIA_PG5 %>% filter(PG5_predicted_ktsp == TRUE) %>% .$Sample ) ) %>%
group_by(Sample, IGHV_mutated,trisomy12, cohort) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm= TRUE) ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
mutate("trisomy12 + IGHV" = if_else(trisomy12 ==1 & IGHV_mutated == 1, "trisomy12 M-CLL",
if_else(trisomy12 ==1 & IGHV_mutated == 0, "trisomy12 U-CLL",
if_else(trisomy12 ==0 & IGHV_mutated == 1, "wt M-CLL",
if_else(trisomy12 ==0 & IGHV_mutated == 0, "wt U-CLL", "NA") ) ) ) ) %>%
ggplot(aes(`trisomy12 + IGHV`, mean_BCR, group= `trisomy12 + IGHV`, fill=`trisomy12 + IGHV` )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "t.test",
comparisons = list( c("wt M-CLL", "wt U-CLL"), c( "trisomy12 U-CLL", "wt U-CLL" ),
c( "trisomy12 M-CLL", "wt M-CLL" ) ) ) +
#scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
scale_fill_manual(values=colors_CCP[1:4]) +
ggtitle("BCR protein abundances ~ IGHV + trisomy12 no PG5 or TP53/del17p") +
pp_sra
## `summarise()` regrouping output by 'Sample', 'IGHV_mutated', 'trisomy12' (override with `.groups` argument)
DIA %>%
filter(!is.na(IGHV_mutated),!is.na(trisomy12), PG.ProteinGroups %in% BCR_genes, cohort != "Germany_1",
!Sample %in% (pred_DIA_PG5 %>% filter(PG5_predicted_ktsp == TRUE) %>% .$Sample ) ) %>%
group_by(Sample, IGHV_mutated,trisomy12, cohort) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm= TRUE) ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
mutate("trisomy12 + IGHV" = if_else(trisomy12 ==1 & IGHV_mutated == 1, "trisomy12 M-CLL",
if_else(trisomy12 ==1 & IGHV_mutated == 0, "trisomy12 U-CLL",
if_else(trisomy12 ==0 & IGHV_mutated == 1, "wt M-CLL",
if_else(trisomy12 ==0 & IGHV_mutated == 0, "wt U-CLL", "NA") ) ) ) ) %>%
ggplot(aes(`trisomy12 + IGHV`, mean_BCR, group= `trisomy12 + IGHV`, fill=`trisomy12 + IGHV` )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "t.test",
comparisons = list( c("wt M-CLL", "wt U-CLL"), c( "trisomy12 U-CLL", "wt U-CLL" ),
c( "trisomy12 M-CLL", "wt M-CLL" ) ) ) +
#scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
scale_fill_manual(values=colors_CCP[1:4]) +
ggtitle("BCR protein abundances ~ IGHV + trisomy12 no PG5") +
pp_sra
## `summarise()` regrouping output by 'Sample', 'IGHV_mutated', 'trisomy12' (override with `.groups` argument)
DIA %>%
filter(!is.na(IGHV_mutated),!is.na(trisomy12), PG.ProteinGroups %in% BCR_genes, cohort != "Germany_1",
cohort != "High_risk",
!Sample %in% (pred_DIA_PG5 %>% filter(PG5_predicted_ktsp == TRUE) %>% .$Sample ) ) %>%
group_by(Sample, IGHV_mutated,trisomy12, cohort) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm= TRUE) ) %>%
mutate(IGHV_mutated = as.factor(IGHV_mutated)) %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
mutate("trisomy12 + IGHV" = if_else(trisomy12 ==1 & IGHV_mutated == 1, "trisomy12 M-CLL",
if_else(trisomy12 ==1 & IGHV_mutated == 0, "trisomy12 U-CLL",
if_else(trisomy12 ==0 & IGHV_mutated == 1, "wt M-CLL",
if_else(trisomy12 ==0 & IGHV_mutated == 0, "wt U-CLL", "NA") ) ) ) ) %>%
ggplot(aes(`trisomy12 + IGHV`, mean_BCR, group= `trisomy12 + IGHV`, fill=`trisomy12 + IGHV` )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "t.test",
comparisons = list( c("wt M-CLL", "wt U-CLL"), c( "trisomy12 U-CLL", "wt U-CLL" ),
c( "trisomy12 M-CLL", "wt M-CLL" ) ) ) +
#scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
scale_fill_manual(values=colors_CCP[1:4]) +
ggtitle("BCR protein abundances ~ IGHV + trisomy12 no PG5 and High risk") +
pp_sra
## `summarise()` regrouping output by 'Sample', 'IGHV_mutated', 'trisomy12' (override with `.groups` argument)
DIA %>%
filter(!is.na(trisomy12), PG.ProteinGroups %in% BCR_genes, cohort != "Germany_1",
!Sample %in% (pred_DIA_PG5 %>% filter(PG5_predicted_ktsp == TRUE) %>% .$Sample ) ) %>%
group_by(Sample, trisomy12, cohort) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm= TRUE) ) %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_BCR, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "t.test" ) +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("BCR protein abundances ~ trisomy12 no PG5") +
pp_sra
## `summarise()` regrouping output by 'Sample', 'trisomy12' (override with `.groups` argument)
DIA %>%
filter(!is.na(trisomy12), PG.ProteinGroups %in% BCR_genes, cohort != "Germany_1",
(is.na(TP53) | TP53== 0), (is.na(del17p13) | del17p13== 0),
!Sample %in% (pred_DIA_PG5 %>% filter(PG5_predicted_ktsp == TRUE) %>% .$Sample ) ) %>%
group_by(Sample, trisomy12, cohort) %>%
summarise(mean_BCR = mean(log.norm.MS2Quantity, na.rm= TRUE) ) %>%
mutate(trisomy12 = as.factor(trisomy12)) %>%
ggplot(aes(trisomy12, mean_BCR, group= trisomy12, fill=trisomy12 )) +
geom_boxplot() +
geom_beeswarm() +
stat_compare_means(method = "t.test" ) +
scale_fill_manual(values=c("#0571b0", "#ca0020", "grey")) +
ggtitle("BCR protein abundances ~ trisomy12 no PG5 or TP53/del17p") +
pp_sra
## `summarise()` regrouping output by 'Sample', 'trisomy12' (override with `.groups` argument)
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] Rtsne_0.15 biomaRt_2.44.1
## [3] biomartr_0.9.2 readxl_1.3.1
## [5] ggpubr_0.4.0 pheatmap_1.0.12
## [7] MultiAssayExperiment_1.14.0 SummarizedExperiment_1.18.2
## [9] DelayedArray_0.14.1 matrixStats_0.56.0
## [11] Biobase_2.48.0 GenomicRanges_1.40.0
## [13] GenomeInfoDb_1.24.2 IRanges_2.22.2
## [15] S4Vectors_0.26.1 BiocGenerics_0.34.0
## [17] ggbeeswarm_0.6.0 limma_3.44.3
## [19] forcats_0.5.0 stringr_1.4.0
## [21] dplyr_1.0.2 purrr_0.3.4
## [23] readr_1.3.1 tidyr_1.1.2
## [25] tibble_3.0.3 ggplot2_3.3.2
## [27] tidyverse_1.3.0 BiocStyle_2.16.0
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.4-1 ggsignif_0.6.0 ellipsis_0.3.1
## [4] rio_0.5.16 XVector_0.28.0 base64enc_0.1-3
## [7] fs_1.5.0 rstudioapi_0.11 farver_2.0.3
## [10] bit64_4.0.5 AnnotationDbi_1.50.3 fansi_0.4.1
## [13] lubridate_1.7.9 xml2_1.3.2 splines_4.0.2
## [16] knitr_1.29 jsonlite_1.7.1 broom_0.7.0
## [19] dbplyr_1.4.4 BiocManager_1.30.10 compiler_4.0.2
## [22] httr_1.4.2 backports_1.1.9 assertthat_0.2.1
## [25] Matrix_1.2-18 cli_2.0.2 htmltools_0.5.0
## [28] prettyunits_1.1.1 tools_4.0.2 gtable_0.3.0
## [31] glue_1.4.2 GenomeInfoDbData_1.2.3 rappdirs_0.3.1
## [34] Rcpp_1.0.5 carData_3.0-4 cellranger_1.1.0
## [37] vctrs_0.3.4 Biostrings_2.56.0 nlme_3.1-149
## [40] xfun_0.17 openxlsx_4.1.5 rvest_0.3.6
## [43] lifecycle_0.2.0 rstatix_0.6.0 XML_3.99-0.5
## [46] zlibbioc_1.34.0 scales_1.1.1 hms_0.5.3
## [49] RColorBrewer_1.1-2 yaml_2.2.1 curl_4.3
## [52] memoise_1.1.0 stringi_1.5.3 RSQLite_2.2.0
## [55] zip_2.1.1 rlang_0.4.7 pkgconfig_2.0.3
## [58] bitops_1.0-6 evaluate_0.14 lattice_0.20-41
## [61] labeling_0.3 bit_4.0.4 tidyselect_1.1.0
## [64] magrittr_1.5 bookdown_0.20 R6_2.4.1
## [67] magick_2.4.0 generics_0.0.2 DBI_1.1.0
## [70] mgcv_1.8-33 pillar_1.4.6 haven_2.3.1
## [73] foreign_0.8-80 withr_2.2.0 abind_1.4-5
## [76] RCurl_1.98-1.2 modelr_0.1.8 crayon_1.3.4
## [79] car_3.0-9 BiocFileCache_1.12.1 rmarkdown_2.3
## [82] progress_1.2.2 grid_4.0.2 data.table_1.13.0
## [85] blob_1.2.1 reprex_0.3.0 digest_0.6.25
## [88] openssl_1.4.2 munsell_0.5.0 beeswarm_0.2.3
## [91] vipor_0.4.5 askpass_1.1
knitr::knit_exit()